library(tidyverse)
library(lubridate)
# library(rjson)
library(themebg)
data_dir <- "csse_covid_19_data/csse_covid_19_daily_reports"
f <- list.files(data_dir, pattern = "csv", full.names = TRUE)
curr <- read_csv(f[length(f)]) %>%
arrange(Country_Region, Province_State, Admin2)
prev <- read_csv("csse_covid_19_data/csse_covid_19_daily_reports/03-30-2020.csv") %>%
arrange(Country_Region, Province_State, Admin2)
miss <- anti_join(prev, curr, by = "Combined_Key")
curr_us <- curr %>%
bind_rows(miss) %>%
filter(!is.na(FIPS))
url = 'https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json'
counties <- rjson::fromJSON(file = url)
# json_file$features[[1]]$id
df_confirmed <- read_csv("csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv") %>%
rename(
state = `Province/State`,
country = `Country/Region`,
lat = Lat,
long = Long
) %>%
pivot_longer(
cols = c(-state, -country, -lat, -long),
names_to = "date",
values_to = "confirmed"
) %>%
mutate_at("date", as_date, format = "%m/%d/%y", tz = "UTC")
df_deaths <- read_csv("csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv") %>%
rename(
state = `Province/State`,
country = `Country/Region`,
lat = Lat,
long = Long
) %>%
pivot_longer(
cols = c(-state, -country, -lat, -long),
names_to = "date",
values_to = "deaths"
) %>%
mutate_at("date", as_date, format = "%m/%d/%y", tz = "UTC")
df_recovered <- read_csv("csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv") %>%
rename(
state = `Province/State`,
country = `Country/Region`,
lat = Lat,
long = Long
) %>%
pivot_longer(
cols = c(-state, -country, -lat, -long),
names_to = "date",
values_to = "recovered"
) %>%
mutate_at("date", as_date, format = "%m/%d/%y", tz = "UTC")
df_ts <- left_join(df_confirmed, df_deaths) %>%
left_join(df_recovered)
df_confirmed_us <- read_csv("csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv") %>%
rename(
state = `Province_State`,
country = `Country_Region`,
long = Long_,
key = Combined_Key
) %>%
rename_all(str_to_lower) %>%
pivot_longer(
cols = -(uid:key),
names_to = "date",
values_to = "confirmed"
) %>%
mutate_at("date", as_date, format = "%m/%d/%y", tz = "UTC")
df_deaths_us <- read_csv("csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv") %>%
rename(
state = `Province_State`,
country = `Country_Region`,
long = Long_,
key = Combined_Key
) %>%
rename_all(str_to_lower) %>%
pivot_longer(
cols = -(uid:population),
names_to = "date",
values_to = "deaths"
) %>%
mutate_at("date", as_date, format = "%m/%d/%y", tz = "UTC")
df_ts_us <- left_join(df_confirmed_us, df_deaths_us) %>%
mutate(str_date = as.character(date))
df_country <- df_ts %>%
group_by(country, date) %>%
summarize_at(c("confirmed", "deaths", "recovered"), sum, na.rm = TRUE) %>%
ungroup() %>%
mutate(active = confirmed - deaths - recovered) %>%
group_by(country) %>%
mutate(
new_cases = confirmed - lag(confirmed),
new_deaths = deaths - lag(deaths)
)
df_case1 <- df_country %>%
group_by(country) %>%
arrange(date, country) %>%
filter(confirmed > 0) %>%
distinct(country, .keep_all = TRUE) %>%
select(country, date_case1 = date)
df_case50 <- df_country %>%
group_by(country) %>%
arrange(date, country) %>%
filter(confirmed > 50) %>%
distinct(country, .keep_all = TRUE) %>%
select(country, date_case50 = date)
df_data <- df_country %>%
left_join(df_case1) %>%
left_join(df_case50) %>%
mutate(
day_case1 = difftime(date, date_case1, units = "days"),
day_case50 = difftime(date, date_case50, units = "days"),
usa = country == "US",
str_date = as.character(date)
) %>%
mutate_at(c("day_case1", "day_case50"), as.numeric)
df_top25 <- df_data %>%
arrange(desc(date), country) %>%
distinct(country, .keep_all = TRUE) %>%
ungroup() %>%
top_n(25, confirmed)
df_top12 <- df_data %>%
arrange(desc(date), country) %>%
distinct(country, .keep_all = TRUE) %>%
ungroup() %>%
top_n(12, confirmed)
df_plt <- filter(df_data, confirmed > 0) %>%
group_by(country, date) %>%
mutate(death_rate = deaths / confirmed * 100) %>%
ungroup()
df_plt_top12 <- semi_join(df_plt, df_top12, by = "country")
df_plt_day1 <- filter(df_data, day_case1 >= 0)
df_plt_day1_top25 <- semi_join(df_plt_day1, df_top25, by = "country")
df_plt_day1_top12 <- semi_join(df_plt_day1, df_top12, by = "country")
df_plt_day50 <- filter(df_data, day_case50 >= 0)
df_plt_day50_top25 <- semi_join(df_plt_day50, df_top25, by = "country")
library(plotly)
df_plt_top12 %>%
plot_ly(x = ~date, y = ~confirmed, color = ~country, colors = "Paired") %>%
add_lines(hovertext = ~country) %>%
layout(showlegend = FALSE)
Confirmed cases by country
df_plt_top12 %>%
plot_ly(x = ~date, y = ~deaths, color = ~country, colors = "Paired") %>%
add_lines(hovertext = ~country) %>%
layout(showlegend = FALSE)
Deaths by country
df_plt_top12 %>%
plot_ly(x = ~date, y = ~death_rate, color = ~country, colors = "Paired") %>%
add_lines(hovertext = ~country) %>%
layout(showlegend = FALSE)
Death rate by country
df_plt_day1_top12 %>%
plot_ly(x = ~day_case1, y = ~confirmed, color = ~country, colors = "Paired") %>%
add_lines(hovertext = ~country) %>%
layout(showlegend = FALSE)
Confirmed cases since first case reported in each country
df_plt_day1_top12 %>%
plot_ly(x = ~day_case1, y = ~deaths, color = ~country, colors = "Paired") %>%
add_lines(hovertext = ~country) %>%
layout(showlegend = FALSE)
Deaths since first case reported in each country
df_plt_top12 %>%
plot_ly(x = ~date, y = ~new_cases, color = ~country, colors = "Paired") %>%
add_bars(hovertext = ~country) %>%
layout(showlegend = FALSE)
New cases reported each day by country
df_plt_top12 %>%
plot_ly(x = ~date, y = ~new_deaths, color = ~country, colors = "Paired") %>%
add_bars(hovertext = ~country) %>%
layout(showlegend = FALSE)
New deaths reported each day by country
df_plt_day1_top12 %>%
ggplot(aes(x = day_case1, y = new_cases, color = country, size = usa)) +
geom_smooth(se = FALSE) +
scale_color_brewer(NULL, palette = "Paired") +
scale_size_discrete(NULL, range = c(0, 1.5)) +
theme_bg() +
theme(legend.position = "none")
df_plt_day1_top12 %>%
ggplot(aes(x = day_case1, y = new_deaths, color = country, size = usa)) +
geom_smooth(se = FALSE) +
scale_color_brewer(NULL, palette = "Paired") +
scale_size_discrete(NULL, range = c(0, 1.5)) +
theme_bg() +
theme(legend.position = "none")
df_plt %>%
plot_ly(
type = "scattergeo",
locationmode = "country names",
locations = ~country,
size = ~confirmed,
frame = ~str_date,
showlegend = FALSE
) %>%
layout(geo = list(projection = list(type = "natural earth")))
Confirmed cases by country over time
curr_us %>%
plot_ly(
type = "choropleth",
geojson = counties,
# locationmode = "USA_states",
locations = ~FIPS,
text = ~Combined_Key,
z = ~Confirmed,
zmin = 0,
zmax = 100,
# hovertemplate = "%{Combined_Key}: %{Confirmed}",
colorscale = "Reds",
# frame = ~str_date,
showlegend = FALSE
) %>%
layout(geo = list(projection = list(type = 'albers usa')))
Confirmed cases by US county
curr_us %>%
plot_ly(
type = "choropleth",
geojson = counties,
locations = ~FIPS,
text = ~Combined_Key,
z = ~Deaths,
zmin = 0,
zmax = 10,
colorscale = "Reds",
# frame = ~str_date,
showlegend = FALSE
) %>%
layout(geo = list(projection = list(type = 'albers usa')))
Deaths by US county
# df_ts_us %>%
# plot_ly(
# type = "scattergeo",
# locationmode = "country names",
# locations = ~country,
# size = ~confirmed,
# frame = ~str_date,
# showlegend = FALSE
# ) %>%
# layout(geo = list(projection = list(type = "natural earth")))
df_ts_us %>%
plot_ly(
type = "choropleth",
geojson = counties,
# locationmode = "USA_states",
locations = ~fips,
text = ~key,
z = ~confirmed,
zmin = 0,
zmax = 100,
# hovertemplate = "%{Combined_Key}: %{Confirmed}",
colorscale = "Reds",
frame = ~str_date,
showlegend = FALSE
) %>%
layout(geo = list(projection = list(type = 'albers usa')))